Preparations

Load the necessary libraries

library(car)       #for regression diagnostics
library(broom)     #for tidy output
library(ggfortify) #for model diagnostics
library(sjPlot)    #for outputs
library(knitr)     #for kable
library(effects)   #for partial effects plots
library(emmeans)   #for estimating marginal means
library(ggeffects) #for partial effects plots
library(MASS)      #for glm.nb
library(MuMIn)     #for AICc
library(DHARMa)    #for residual diagnostics plots
library(modelr)    #for auxillary modelling functions
library(performance) #for residuals diagnostics
library(see)         #for plotting residuals
library(patchwork)   #for grids of plots
library(tidyverse) #for data wrangling

Scenario

Loyn (1987) modeled the abundance of forest birds with six predictor variables (patch area, distance to nearest patch, distance to nearest larger patch, grazing intensity, altitude and years since the patch had been isolated).

Regent honeyeater

Format of loyn.csv data file

ABUND DIST LDIST AREA GRAZE ALT YR.ISOL
.. .. .. .. .. .. ..
ABUND Abundance of forest birds in patch- response variable
DIST Distance to nearest patch - predictor variable
LDIST Distance to nearest larger patch - predictor variable
AREA Size of the patch - predictor variable
GRAZE Grazing intensity (1 to 5, representing light to heavy) - predictor variable
ALT Altitude - predictor variable
YR.ISOL Number of years since the patch was isolated - predictor variable

The aim of the analysis is to investigate the effects of a range of predictors on the abundance of forest birds.

Read in the data

loyn = read_csv('../public/data/loyn.csv', trim_ws=TRUE)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   ABUND = col_double(),
##   AREA = col_double(),
##   YR.ISOL = col_double(),
##   DIST = col_double(),
##   LDIST = col_double(),
##   GRAZE = col_double(),
##   ALT = col_double()
## )
glimpse(loyn)
## Rows: 56
## Columns: 7
## $ ABUND   <dbl> 5.3, 2.0, 1.5, 17.1, 13.8, 14.1, 3.8, 2.2, 3.3, 3.0, 27.6, 1.…
## $ AREA    <dbl> 0.1, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2…
## $ YR.ISOL <dbl> 1968, 1920, 1900, 1966, 1918, 1965, 1955, 1920, 1965, 1900, 1…
## $ DIST    <dbl> 39, 234, 104, 66, 246, 234, 467, 284, 156, 311, 66, 93, 39, 4…
## $ LDIST   <dbl> 39, 234, 311, 66, 246, 285, 467, 1829, 156, 571, 332, 93, 39,…
## $ GRAZE   <dbl> 2, 5, 5, 3, 5, 3, 5, 5, 4, 5, 3, 5, 2, 1, 5, 5, 3, 3, 3, 2, 2…
## $ ALT     <dbl> 160, 60, 140, 160, 140, 130, 90, 60, 130, 130, 210, 160, 210,…

Exploratory data analysis

Fit the model

Model formula: \[ y_i \sim{} \mathcal{N}(\mu_i, \sigma^2)\\ log(\mu_i) = \boldsymbol{\beta} \bf{X_i} \]

where \(\boldsymbol{\beta}\) is a vector of effects parameters and \(\bf{X}\) is a model matrix representing the additive effects of the scaled versions of distance (ln), distance to the nearest large patch (ln), patch area (ln), grazing intensity, year of isolation and altitude on the abundance of forest birds.

Model validation

Partial plots

Caterpillar plot

plot_model(loyn.glm,  type='est')

plot_model(loyn.glm,  type='est', transform='exp', show.values=TRUE)

Model investigation / hypothesis testing

Further analyses

Summary figures

## Using emmeans
loyn.grid <- with(loyn,  list(fGRAZE=levels(fGRAZE),
                              AREA = seq(min(AREA),  max(AREA),  len=100)))

#newdata = with(loyn,  list(fGRAZE=levels(fGRAZE),
#                             AREA=seq_range(AREA,  n=100))
newdata = emmeans(loyn.glm2,  ~AREA|fGRAZE,  at=loyn.grid,  type='response') %>%
  as.data.frame
## OR
#newdata = emmeans(loyn.lm3, ~AREA|fGRAZE,
#        at=list(fGRAZE=levels(loyn$fGRAZE),
#                AREA=seq(min(loyn$AREA), max(loyn$AREA), len=100))) %>%
#    as.data.frame
head(newdata)
ggplot(newdata, aes(y=response, x=AREA, color=fGRAZE, fill=fGRAZE)) +
  geom_point(data=loyn,  aes(y=ABUND,  color=fGRAZE)) +
  geom_ribbon(aes(ymin=asymp.LCL, ymax=asymp.UCL), color=NA, alpha=0.3) +
  geom_line() +
  scale_x_log10(labels=scales::comma)+
  scale_y_log10('Abundance') +
  theme_classic()

If we want to limit the range of predictions within each Grazing level…

loyn.grid = with(loyn,  list(fGRAZE=levels(fGRAZE),
                             AREA=as.vector(seq(1, 10, len=10) %o% 10^(-1:3)) %>% unique))
                             

newdata = emmeans(loyn.glm2, ~AREA|fGRAZE,
                  at=loyn.grid,
                  type='response') %>%
    as.data.frame
head(newdata)
loyn.limits = loyn %>%
  group_by(fGRAZE) %>%
  summarise(Min=min(AREA),  Max=max(AREA))
## `summarise()` ungrouping output (override with `.groups` argument)
newdata1 = newdata %>%
  full_join(loyn.limits) %>%
  filter(AREA>=Min,  AREA<=Max)
## Joining, by = "fGRAZE"
head(newdata1)
ggplot(newdata1, aes(y=response, x=AREA, color=fGRAZE, fill=fGRAZE)) +
    geom_ribbon(aes(ymin=asymp.LCL, ymax=asymp.UCL), color=NA, alpha=0.3) +
    geom_line() +
    scale_x_log10(labels=scales::comma)+
    scale_y_log10('Abundance') +
  theme_classic() +
  scale_color_viridis_d() +
  scale_fill_viridis_d() +
  geom_point(data=loyn,  aes(y=ABUND,  color=fGRAZE))

References

Loyn, R. H. 1987. “Nature Conservation: The Role of Remnants of Native Vegetation.” In, edited by D. A. Saunders, G. W. Arnold, A. A. Burbridge, and A. J. M. Hopkins. Chipping Norton, NSW: Surrey Beatty & Sons.